package com.shz.appletsapi.service.webmagic.mine;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;

public class OnfxhHtmlPageProcessor implements PageProcessor {

	private Site site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(10 * 1000);

	private static String URL = ".*#.*";
	private static String URL_RVN = ".*list_2.*";
	private static String URL_BCX = ".*list_19.*";
	private static String URL_NOTICE = ".*notice.*";
	private Boolean flag = Boolean.TRUE;
	private Boolean flag2 = Boolean.TRUE;

	@Override
	public void process(Page page) {

		List<String> pages = new ArrayList<>();
		List<String> pages2 = new ArrayList<>();
		if (flag) {
			pages = page.getHtml().xpath("//tbody//a").links().all();
			pages2 = page.getHtml().xpath("//div[@class='new-side-box']/ul//a").links().all();
			flag = false;
		}
		pages2.stream().forEach(System.out::println);
		System.out.println(pages2.stream().count());
		page.addTargetRequest("https://www.feixiaohao.com/list_2.html");
		page.addTargetRequest("https://www.feixiaohao.com/list_19.html");
		
		page.addTargetRequests(pages);
		page.addTargetRequests(pages2);
		if (page.getUrl().regex(URL).match() || page.getUrl().get().equals("https://www.feixiaohao.com/")) {
			if (flag2) {
				System.out.println("-------------首页" + page.getUrl());
				String html = page.getHtml().get();
				String regEx_script = "<div class=\"new-nav\">[\\s\\S]*?<div class=\"m120\"></div>";
				Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
				Matcher m_script = p_script.matcher(html);
				String a = m_script.replaceAll("");

				String regEx_scriptBottom = "<div class=\"footer\">[\\s\\S]*?<div class=\"totop\">";
				Pattern p_script2 = Pattern.compile(regEx_scriptBottom, Pattern.CASE_INSENSITIVE);
				Matcher m_script2 = p_script2.matcher(a);
				a = m_script2.replaceAll("");

				String regEx_3 = "<div class=\"new-slide-btn\">[\\s\\S]*?</div>";
				Pattern p_script3 = Pattern.compile(regEx_3, Pattern.CASE_INSENSITIVE);
				Matcher m_script3 = p_script3.matcher(a);
				a = m_script3.replaceAll("");

				String regEx_4 = "<h1 class=\"new-cell-tit active\">[\\s\\S]*?自选币</a>";
				Pattern p_script4 = Pattern.compile(regEx_4, Pattern.CASE_INSENSITIVE);
				Matcher m_script4 = p_script4.matcher(a);
				a = m_script4.replaceAll("");

				String regEx_5 = "<div class=\"new-slide-btn unit\">[\\s\\S]*?</div>";
				Pattern p_script5 = Pattern.compile(regEx_5, Pattern.CASE_INSENSITIVE);
				Matcher m_script5 = p_script5.matcher(a);
				a = m_script5.replaceAll("");

				String regEx_6 = "<div class=\"new-page-list\">[\\s\\S]*?</div>";
				Pattern p_script6 = Pattern.compile(regEx_6, Pattern.CASE_INSENSITIVE);
				Matcher m_script6 = p_script6.matcher(a);
				a = m_script6.replaceAll("");
				
				
				String regEx_7 = "<div class=\"new-side-box\">[\\s\\S]*?<ul class=\"new-linksList\"></ul>";
				Pattern p_script7 = Pattern.compile(regEx_7, Pattern.CASE_INSENSITIVE);
				Matcher m_script7 = p_script7.matcher(a);
				a = m_script7.replaceAll("");
				
				String regEx_8 = "<th class=\"th-num\">[\\s\\S]*?</th>";
				Pattern p_script8 = Pattern.compile(regEx_8, Pattern.CASE_INSENSITIVE);
				Matcher m_script8 = p_script8.matcher(a);
				a = m_script8.replaceAll("");
				
				String regEx_9 = "<td>\\d+</td>";
				Pattern p_script9 = Pattern.compile(regEx_9, Pattern.CASE_INSENSITIVE);
				Matcher m_script9 = p_script9.matcher(a);
				a = m_script9.replaceAll("");
				
				
				
				
				a = a.replaceAll("非小号", "拾荒者");
			//	a = a.replaceAll("#fff", "#B0E2FF");
				a = a.replaceAll("fxh3.js", "");
				page.putField("html", a);
				page.putField("name", "index");
				flag2 = false;
			}

		} else if (page.getUrl().regex(URL_RVN).match()) {
			// RVN
			String searchHtml = page.getHtml().xpath("//tr[@id='ravencoin']").get();
			String searchHref = page.getHtml().xpath("//tr[@id='ravencoin']/td[2]/a").links().get();
			
			String regEx_9 = "<td>\\d+</td>";
			Pattern p_script9 = Pattern.compile(regEx_9, Pattern.CASE_INSENSITIVE);
			Matcher m_script9 = p_script9.matcher(searchHtml);
			searchHtml = m_script9.replaceAll("");
			
			page.addTargetRequest(searchHref);
			page.putField("html", searchHtml);
			page.putField("name", "rvn");
		} else if(page.getUrl().regex(URL_BCX).match()) { 
			// BCX
			String searchHtml = page.getHtml().xpath("//tr[@id='bcx']").get();
			String searchHref = page.getHtml().xpath("//tr[@id='bcx']/td[2]/a").links().get();
			
			String regEx_9 = "<td>\\d+</td>";
			Pattern p_script9 = Pattern.compile(regEx_9, Pattern.CASE_INSENSITIVE);
			Matcher m_script9 = p_script9.matcher(searchHtml);
			searchHtml = m_script9.replaceAll("");
			
			page.addTargetRequest(searchHref);
			page.putField("html", searchHtml);
			page.putField("name", "bcx");
			
			
		}else if (page.getUrl().regex(URL_NOTICE).match()) {
			System.out.println("-------------新闻页" + page.getUrl());
			String html = page.getHtml().get();
			String regEx_script = "<div class=\"new-nav\">[\\s\\S]*?<div class=\"m120\"></div>";
			Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
			Matcher m_script = p_script.matcher(html);
			String a = m_script.replaceAll("");

			String regEx_scriptBottom = "<div class=\"footer\">[\\s\\S]*?<div class=\"totop\"";
			Pattern p_script2 = Pattern.compile(regEx_scriptBottom, Pattern.CASE_INSENSITIVE);
			Matcher m_script2 = p_script2.matcher(a);
			a = m_script2.replaceAll("");
			
			
			String regEx_3 = "<div class=\"boxTit\">[\\s\\S]*?</div>";
			Pattern p_script3 = Pattern.compile(regEx_3, Pattern.CASE_INSENSITIVE);
			Matcher m_script3 = p_script3.matcher(a);
			a = m_script3.replaceAll("");

			String regEx_4 = "<ul class=\"artList\" [\\s\\S]*?</ul>";
			Pattern p_script4 = Pattern.compile(regEx_4, Pattern.CASE_INSENSITIVE);
			Matcher m_script4 = p_script4.matcher(a);
			a = m_script4.replaceAll("");
			a = a.replaceAll("<body>", "<body style=\"background:#B0E2FF\">");
			
			String url = page.getUrl().get();
			url = url.replace("https://www.feixiaohao.com/notice/", "");
			url = url.replace(".html", "");
			
			page.putField("html", a);
			page.putField("name", "notice/" + url.trim());
			
			
		} else {
			System.out.println("-------------详情页" + page.getUrl());
			String html = page.getHtml().get();
			
			String regEx_script = "<div class=\"new-nav\">[\\s\\S]*?<div class=\"m120\"></div>";
			Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
			Matcher m_script = p_script.matcher(html);
			String a = m_script.replaceAll("");

			String regEx_scriptBottom = "<div class=\"footer\">[\\s\\S]*?<div class=\"totop\"";
			Pattern p_script2 = Pattern.compile(regEx_scriptBottom, Pattern.CASE_INSENSITIVE);
			Matcher m_script2 = p_script2.matcher(a);
			a = m_script2.replaceAll("");

			String regEx_3 = "<div class=\"addto disactive\"[\\s\\S]*?</div>";
			Pattern p_script3 = Pattern.compile(regEx_3, Pattern.CASE_INSENSITIVE);
			Matcher m_script3 = p_script3.matcher(a);
			a = m_script3.replaceAll("");
			
			
			String regEx_4 = "<div class=\"box box325\"[\\s\\S]*?<div class=\"top\"></div>";
			Pattern p_script4 = Pattern.compile(regEx_4, Pattern.CASE_INSENSITIVE);
			Matcher m_script4 = p_script4.matcher(a);
			a = m_script4.replaceAll("");
			
			
			String regEx_5 = "<a class=\"more\"[\\s\\S]*?</a>";
			Pattern p_script5 = Pattern.compile(regEx_5, Pattern.CASE_INSENSITIVE);
			Matcher m_script5 = p_script5.matcher(a);
			a = m_script5.replaceAll("");
			
			String regEx_6 = "<a class=\"seeAll\"[\\s\\S]*?</a>";
			Pattern p_script6 = Pattern.compile(regEx_6, Pattern.CASE_INSENSITIVE);
			Matcher m_script6 = p_script6.matcher(a);
			a = m_script6.replaceAll("");
			
			
			String regEx_7 = "<a href=\"[\\s\\S]*?\"";
			Pattern p_script7 = Pattern.compile(regEx_7, Pattern.CASE_INSENSITIVE);
			Matcher m_script7 = p_script7.matcher(a);
			a = m_script7.replaceAll("<a href=\"javascript:void(0);\"");
			
			
			/*
			String regEx_8 = "//www.okb[\\s\\S]*?1162544";
			Pattern p_script8 = Pattern.compile(regEx_8, Pattern.CASE_INSENSITIVE);
			Matcher m_script8 = p_script8.matcher(a);
			a = m_script8.replaceAll("javascript:void(0);");
			
			
			String regEx_9 = "//www.[\\s\\S]*?.com";
			Pattern p_script9 = Pattern.compile(regEx_9, Pattern.CASE_INSENSITIVE);
			Matcher m_script9 = p_script9.matcher(a);
			a = m_script9.replaceAll("javascript:void(0);");
			
			String regEx_10 = "//www.[\\s\\S]*?.net";
			Pattern p_script10 = Pattern.compile(regEx_10, Pattern.CASE_INSENSITIVE);
			Matcher m_script10 = p_script10.matcher(a);
			a = m_script10.replaceAll("javascript:void(0);");
			
			String regEx_11 = "/conceptcoin/[\\s\\S]*?/";
			Pattern p_script11 = Pattern.compile(regEx_11, Pattern.CASE_INSENSITIVE);
			Matcher m_script11 = p_script11.matcher(a);
			a = m_script11.replaceAll("javascript:void(0);");
			
			String regEx_12 = "http:[\\s\\S]*?/";
			Pattern p_script12 = Pattern.compile(regEx_12, Pattern.CASE_INSENSITIVE);
			Matcher m_script12 = p_script12.matcher(a);
			a = m_script12.replaceAll("javascript:void(0);");*/
			
			
			
			
			a = a.replaceAll("非小号", "拾荒者");
			a = a.replaceAll("<body>", "<body style=\"background:#B0E2FF\">");
			a = a.replaceAll("查看全部", "");
			a = a.replaceAll("coinrank.js", "");
			a = a.replaceAll("coinoutlink.js", "");
			a = a.replaceAll("hischarts_new.js", "");
			
			
			String url = page.getUrl().get();
			url = url.replace("https://www.feixiaohao.com/currencies/", "");
			url = url.replace("/", "");
			page.putField("html", a);
			page.putField("name", "currencies/" + url.trim());

		}

	}

	@Override
	public Site getSite() {
		return site;
	}

	public String gxhDetail(Html h) {
		String html = h.get();

		String regEx_script = "<div class=\"new-nav\">[\\s\\S]*?<div class=\"m120\"></div>";
		Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
		Matcher m_script = p_script.matcher(html);
		String a = m_script.replaceAll("");

		String regEx_scriptBottom = "<div class=\"footer\">[\\s\\S]*?<div class=\"totop\"";
		Pattern p_script2 = Pattern.compile(regEx_scriptBottom, Pattern.CASE_INSENSITIVE);
		Matcher m_script2 = p_script2.matcher(a);
		a = m_script2.replaceAll("");

		a = a.replaceAll("非小号", "拾荒者");
		return a;
	}

	/*public static void main(String[] args) {

	
		Spider.create(new OnfxhHtmlPageProcessor()).addUrl("https://www.feixiaohao.com/")
				.addPipeline(new OnfxhHtmlPipeline()).thread(1).run();

	}*/
}
